#multiple linear model example
ad <- read.csv("C:\\Users\\GradQuant\\Desktop\\Advertising.csv")
summary(ad)

#visualization
library("scatterplot3d")
scatterplot3d(ad[, 3:5])

pairs(ad[, 2:5])

#linear models
model1 <- lm(ad$sales~ad$TV)
summary(model1)

model2 <- lm(ad$sales~ad$TV + ad$radio + ad$newspaper)
summary(model2)

model3 <- lm(ad$sales~ad$TV*ad$radio)
summary(model3)


#pca example
wdbc <- read.csv("C:\\Users\\GradQuant\\Desktop\\wdbcdata.csv")

#pca
wdbc.pr <- prcomp(wdbc[c(3:32)], center = TRUE, scale = TRUE)
summary(wdbc.pr)

#screen plot
screeplot(wdbc.pr, type = "l", npcs = 15, main = "Screeplot of the first 15 PCs")

#cumulative proportion
cumpro <- cumsum(wdbc.pr$sdev^2 / sum(wdbc.pr$sdev^2))
plot(cumpro[0:15], xlab = "PC #", ylab = "Amount of explained variance", main = "Cumulative variance plot")
abline(v = 6, col="blue", lty=5)
abline(h = 0.88759, col="blue", lty=5)
legend("topleft", legend=c("Cut-off @ PC6"), col=c("blue"), lty=5, cex=0.6)

#scatterplot using pc
plot(wdbc.pr$x[,1],wdbc.pr$x[,2], xlab="PC1 (44.3%)", ylab = "PC2 (19%)", main = "PC1 / PC2 - plot")


#new data frame of pc
wdbc.pc <- wdbc.pr$x[,1:6]
wdbc.pc <- cbind(wdbc.pc, as.numeric(wdbc$diagnosis)-1)
colnames(wdbc.pc)[7] <- "diagnosis"
wdbc.pc <- as.data.frame(wdbc.pc)

#pc for logistic regression
gmodel <- glm(diagnosis~PC1 + PC2 + PC3 + PC4 + PC5 + PC6, family=binomial, data=wdbc.pc)
summary(gmodel)

#logistic regression using original dataset
gmodel2 <- glm(diagnosis~ .- id, family=binomial, data=wdbc)
summary(gmodel2)


